*==========================================================================================
*                This code replicates the results  of :                                   *
*                                                                                         *
*      MOTHERHOOD AND WOMEN’S SELF-EMPLOYMENT : THEORY AND EVIDENCE FROM NIGERIA          *
*                                                                                         *
*                  by Jean-Louis Bago and Sylvain Dessy                                   *
*                     (jean-louis.bago.1@ulaval.ca)                                       *
*                        @version: Stata 14.1                                             *
*==========================================================================================


set more off
clear all
clear matrix
clear mata

set matsize 5000
set maxvar 32000

append using "C:\Users\Bago\Desktop\BD_EDCC\Data\ndhs8.dta"
rename v208 kidunder5
rename v106 education_level
rename v133 education
replace v502 =0 if v502==2
rename v502 marital_status
rename v013 age_group
rename v012 age
rename v025 area_residence
rename v101 region
rename v3a08e infertility1
rename v203 daughters
rename v202 sons
rename v001 village
rename v212 agefirstbirth
rename v511 Ageat1stUnion


gen oldchildren = daughters+sons-kidunder5

gen secondary=.
replace secondary=1 if education_level>=2
replace secondary=0 if education_level==0|education_level==1

gen primary=.
replace primary=1 if education_level==1
replace primary=0 if education_level==0|education_level>=2

gen noeducation=.
replace noeducation=1 if education_level==0
replace noeducation=0 if education_level>=1

gen infertility2=.
replace infertility2=1 if v605==7
replace infertility2=0 if v605!=7  &  v605!=. 

gen Infertility=.
replace  Infertility=0 if infertility2==0 & infertility1==0
replace  Infertility=1 if infertility2==1 | infertility1==1


gen children=.
replace children=1 if kidunder5>0
replace children=0 if kidunder5==0


gen childmarriage = .
replace childmarriage = 1 if Ageat1stUnion <= 18 
replace childmarriage = 0 if Ageat1stUnion > 18


gen working=.
replace working=0 if v731==0
replace working=1 if v731==2|v731==3|v731==1
label var working " Working "
label define labetype1 0 "not working" 1 "working"
label value working labetype1

gen selfemployment = .
replace selfemployment= 0 if (v719==1|v719==2)
replace selfemployment= 1 if  v719==3
label var selfemployment " self employment" 
label define labtype  1 " self " 0 " wage " 
label value selfemployment labtype
replace working=. if (selfemployment==. & working==1)

label var children " Preschool-age children" 
label var oldchildren "School-age children"
label var marital_status "Married"
label var secondary "At least secondary education"  

gen earlymotherhood=.
replace earlymotherhood=1 if agefirstbirth<18
replace earlymotherhood=0 if agefirstbirth>=18  

gen EmploymentRate=.
local N = _N
forvalues num = 1/`N'{
quiet sum working if (age_group == age_group[`num'] & region == region[`num'])
quiet replace EmploymentRate = r(mean) in `num'
}
gen UnemploymentRate=(1-EmploymentRate)

gen id = _n
local id id

gen touse = !missing(Infertility, working, kidunder5, children,  secondary,  marital_status, age_group, UnemploymentRate, area_residence, region)
drop if touse!=1
tabulate age_group, generate (age_group)
tabulate region, generate (region)
tabulate area_residence, generate (area_residence)
gen smkids=children


gen reform =.
replace reform =1 if age <= 25
replace reform =0 if age >25
gen high_intensity=.
replace high_intensity = 1 if region==1|region==2|region==4|region==5
replace high_intensity = 0 if region==3|region==6
gen reform_high_intensity =  reform*high_intensity


***************************************
*   Table 1 : Summary Statistics      *
***************************************


gen workstatus=.
replace workstatus=1 if selfemployment==0
replace workstatus =2 if selfemployment==1
replace workstatus=3 if working ==0
set more off
preserve
keep workstatus  noeducation primary secondary children oldchildren  Infertility marital_status age_group1 age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2  region1 region2 region3 region4 region5 region6 
bysort workstatus: outreg2 using tab1.tex, replace sum(log) eqkeep(mean sd)
restore



**********************************************************************************************************
*    Table 2: Differences in individual characteristics, between working, and non working women          *
**********************************************************************************************************


preserve
set more off
* Summary statistics
local varlist1 children  oldchildren marital_status secondary  area_residence2
local varlist2  age_group1 age_group2 age_group3  age_group4 age_group5 age_group6 age_group7 region1 region2 region3 region4 region5 region6 
local xvars `varlist1' `varlist2'

* Matrix for output
local z: word count `xvars' 

matrix T = J(`z', 6, . ) 
matrix rownames T = `xvars' 
matrix colnames T = n Working sdC  Notworking sdT pv 


* Estimation
foreach var in `varlist1' { 
	global `var'label: var label `var' 

reg `var' working  if `var' !=.
		g sample_`var' = (e(sample)==1) 
		local pr = 2*ttail(e(df_r),abs(_b[working]/_se[working])) 
		mat T[rownumb(T, "`var'"), colnumb(T,"pv")] = `pr' 
		
		sum `var' if sample_`var' == 1, d
	mat T[rownumb(T, "`var'"), colnumb(T,"n")] = `r(N)'
	
	sum `var' if sample_`var' == 1 & working == 1 
		mat T[rownumb(T, "`var'"), colnumb(T,"sdT")] = `r(sd)' 
		mat T[rownumb(T, "`var'"), colnumb(T,"Working")] = `r(mean)'
	sum `var' if sample_`var' == 1 & working == 0 
		mat T[rownumb(T, "`var'"), colnumb(T,"sdC")] = `r(sd)' 
		mat T[rownumb(T, "`var'"), colnumb(T,"Notworking")] = `r(mean)'
} 

foreach var in `varlist2' { 
	global `var'label: var label `var' 

reg `var' working if `var' !=.
		g sample_`var' = (e(sample)==1) 
		local pr = 2*ttail(e(df_r),abs(_b[working]/_se[working])) 
		mat T[rownumb(T, "`var'"), colnumb(T,"pv")] = `pr' 
		
		sum `var' if sample_`var' == 1, d
	mat T[rownumb(T, "`var'"), colnumb(T,"n")] = `r(N)'
	
	sum `var' if sample_`var' == 1 & working == 1 
		mat T[rownumb(T, "`var'"), colnumb(T,"sdT")] = `r(sd)' 
		mat T[rownumb(T, "`var'"), colnumb(T,"Working")] = `r(mean)'
	sum `var' if sample_`var' == 1 & working == 0 
		mat T[rownumb(T, "`var'"), colnumb(T,"sdC")] = `r(sd)' 
		mat T[rownumb(T, "`var'"), colnumb(T,"Notworking")] = `r(mean)'

}


* Formating Matrix
matrix list T

clear    
svmat T 
rename T1 n
rename T2 Working
rename T3 sdC
rename T4 Notworking
rename T5 sdT
rename T6 pv

gen var_name = "" 
gen var = ""
order var_name 

local i = 1 
foreach var in `xvars' { 
	replace var_name = "$`var'label" in `i' 
	replace var = "`var'" in `i' 
	local i = `i' + 1 
} 

label var var_name "Variable" 
label var n "N " 
label var Working "Mean Working" 
label var sdC "SD" 
label var Notworking "Mean Notworking" 
label var sdT "SD" 
label var pv "p-value" 

foreach var of varlist n { 
	replace `var' = round(`var', 1) 
	format `var' %9.0f 
} 

foreach var of varlist Notworking Working sdT sdC { 
	replace `var' = round(`var', 0.001) 
	format `var' %9.3f 
} 

foreach var of varlist pv { 
	replace `var' = round(`var', 0.001) 
	format `var' %9.3f 
}

gen stars = "" 
replace stars = "*" if pv <= 0.10 
replace stars = "**" if pv <= 0.05 
replace stars = "***" if pv <= 0.01 

drop var


capture erase "table.tex"
texsave  using "table.tex" , size(2) ///
width(0.6\textwidth) title("Summary Statistics") ///
hlines(2) replace varlabels frag location(ht)	
shellout using `"table.tex"'

restore




*********************************************
*    Table 3, Column 1: probit estimation   *
*********************************************


local s working
local d children
local y selfemployment
local z2 children  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2 UnemploymentRate region2 region3 region4 region5 region6 oldchildren
local z1 Infertility  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2   region2 region3 region4 region5 region6  
local x1 children  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2  region2 region3 region4 region5 region6 oldchildren

probit `y' `x1',  vce(cluster village)
boottest children  secondary, reps (9999) bootcluster(village)
margins, expression(normal(xb(selfemployment)+(1-children)*_b[children])-normal(xb(selfemployment)-children*_b[children]))
margins, expression(normal(xb(selfemployment)+(1-secondary)*_b[secondary])-normal(xb(selfemployment)-secondary*_b[secondary]))



*****************************************************************
* Table 3, column 2 and 3: PROBIT MODEL WITH SAMPLE SELECTION   *
*****************************************************************

preserve
set more off
local s working
local d children
local y selfemployment
local z2 children  secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2 UnemploymentRate region2 region3 region4 region5 region6 oldchildren
local z1 Infertility  secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2   region2 region3 region4 region5 region6  
local x1 children   secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2  region2 region3 region4 region5 region6 oldchildren

capture program drop myheckprob
program define myheckprob

	args lnf x1 x2 c21
	tempvar sp2 sp3 k1 k2
   quietly{
	gen double `k1' = 2*$ML_y1 - 1
	gen double `k2' = 2*$ML_y2 - 1
	tempname cf21 cf22 C1
	sum `c21', meanonly
	scalar `cf21' = r(mean)
	scalar `cf22' = sqrt(1 - `cf21'^2)
	mat `C1' = (1, 0 \ `cf21', `cf22')
	egen `sp3' = mvnp(`x1' `x2') if $ML_y1==1, chol(`C1') dr($dr) prefix(w) signs(`k1' `k2') adoonly
	gen `sp2' = 1-normal(`x1') if $ML_y1==0
	replace `lnf' = ln(`sp3') if $ML_y1==1
	replace `lnf' = ln(`sp2') if $ML_y1==0
   }
end


heckprobit `y' `x1', sel(`s'= `z2') 

mat b=e(b)
	mat b2 = b[1,1..17]
	mat coleq b2 = selfemployment
	mat b1 = b[1,18..35]
	mat coleq b1 = working
	mat b0 = b1, b2

mdraws, neq(2) dr(100) prefix(w) burn(10) replace
global dr = r(n_draws)


ml model lf myheckprob (`s': `s' = `z2') (`y': `y' = `x1') /c21 , missing vce(cluster village)


ml init b0

ml maximize

******marginal effect of CHILDREN on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+ [selfemployment]_b[children] + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons]  + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))

		
******marginal effect of EDUCATION on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+  smkids*[selfemployment]_b[children]+[selfemployment]_b[secondary] + marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons] +smkids*[selfemployment]_b[children]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))




*********marginal effect of CHILDREN on WORKING
margins, expression(normal([working]_b[_cons]+ [working]_b[children] + secondary*[working]_b[secondary]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren])  ///
		-normal([working]_b[_cons] + secondary*[working]_b[secondary]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren]))

******marginal effect of EDUCATION on WORKING
margins, expression(normal([working]_b[_cons]+  smkids*[working]_b[children]+[working]_b[secondary] + marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren])  ///
		-normal([working]_b[_cons] +smkids*[working]_b[children]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren]))

restore

******************************************************
** Table 3, column 4 and 5: Biprobit for endogeneity *                   *
******************************************************
preserve
set more off
local s working
local d children
local y selfemployment
local z2 children  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2 UnemploymentRate region2 region3 region4 region5 region6 oldchildren
local z1 Infertility  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2   region2 region3 region4 region5 region6  
local x1 children  secondary marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2  region2 region3 region4 region5 region6 oldchildren

biprobit (`d' = `z1') (`y' = `x1'), vce(cluster village)
boottest children secondary, reps (9999) bootcluster(village)

******marginal effect of CHILDREN on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+ [selfemployment]_b[children] + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons] + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))

******marginal effect of EDUCATION on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+  smkids*[selfemployment]_b[children]+[selfemployment]_b[secondary] + marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons] +smkids*[selfemployment]_b[children]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))

******marginal effect of INFERTILITY on CHILDREN
margins, expression(normal(xb(children)+(1-Infertility)*[children]_b[Infertility])-normal(xb(children)-Infertility*[children]_b[Infertility]))
restore


**********************************************************
* Table 4: BIVARIATE PROBIT MODEL WITH SAMPLE SELECTION  *
**********************************************************

preserve
set more off
local s working
local d children
local y selfemployment
local z2 children secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2 UnemploymentRate region2 region3 region4 region5 region6 oldchildren
local z1 Infertility secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2   region2 region3 region4 region5 region6  
local x1 children secondary  marital_status age_group2 age_group3  age_group4 age_group5 age_group6 age_group7  area_residence2  region2 region3 region4 region5 region6 oldchildren

*******
capture program drop myll

program define myll
	args lnf x1 x2 x3 c21 c31 c32
	tempvar sp2 sp3 k1 k2 k3
   quietly{
	gen double `k1' = 2*$ML_y1 - 1
	gen double `k2' = 2*$ML_y2 - 1
	gen double `k3' = 2*$ML_y3 - 1
	tempname cf21 cf22 cf31 cf32 cf33 C1 C2
	sum `c21', meanonly
	scalar `cf21' = r(mean)
	sum `c31', meanonly
	scalar `cf31' = r(mean)
	sum `c32', meanonly
	scalar `cf32' = r(mean)
	scalar `cf22' = sqrt(1 - `cf21'^2)
	scalar `cf33' = sqrt(1 - `cf31'^2 - `cf32'^2)
	mat `C1' = (1, 0, 0 \ `cf21', `cf22', 0 \ `cf31', `cf32', `cf33')
	mat `C2' = (1, 0 \ `cf21', `cf22')
	egen `sp3' = mvnp(`x1' `x2' `x3') if $ML_y1==1, chol(`C1') dr($dr) prefix(z) signs(`k1' `k2' `k3') adoonly
	egen `sp2' = mvnp(`x1' `x2') if $ML_y1==0, chol(`C2') dr($dr) prefix(z) signs(`k1' `k2') adoonly
	replace `lnf' = ln(`sp3') if $ML_y1==1
	replace `lnf' = ln(`sp2') if $ML_y1==0
   }
end


probit `s' `z2', cluster(`id')
mat b1=e(b)
mat coleq b1 = `s'



probit `d' `z1', cluster(`id')
mat b2 = e(b)
mat coleq b2 = `d'

probit `y' `x1', cluster(`id')
mat b3 = e(b)
mat coleq b3 = `y'


mat b0 = b1, b2, b3

mdraws, neq(3) dr(100) prefix(z) burn(10) replace

global dr = r(n_draws)

ml model lf myll (`s': `s' = `z2') (`d': `d' = `z1') (`y': `y' = `x1') /c21 /c31 /c32, missing vce(cluster village)

ml init b0

ml maximize

***** note: (nlcom run with Stata 14.1)
nlcom (r21: [c21]_b[_cons]) (r31: [c31]_b[_cons]) (r32: [c21]_b[_cons]*[c31]_b[_cons]+ sqrt(1- [c21]_b[_cons]^2)*[c32]_b[_cons])

*************
  

******marginal effect of CHILDREN on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+ [selfemployment]_b[children] + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons] + secondary*[selfemployment]_b[secondary]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))

******marginal effect of EDUCATION on SELFEMPLOYMENT
margins, expression(normal([selfemployment]_b[_cons]+  smkids*[selfemployment]_b[children]+[selfemployment]_b[secondary] + marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren])  ///
		-normal([selfemployment]_b[_cons] +smkids*[selfemployment]_b[children]+ marital_status*[selfemployment]_b[marital_status] + age_group2*[selfemployment]_b[age_group2] + age_group3*[selfemployment]_b[age_group3] ///
		+ age_group4*[selfemployment]_b[age_group4]+age_group5*[selfemployment]_b[age_group5]+age_group6*[selfemployment]_b[age_group6]+age_group7*[selfemployment]_b[age_group7]+area_residence2*[selfemployment]_b[area_residence2] ///
		+region2*[selfemployment]_b[region2]+region3*[selfemployment]_b[region3]+region4*[selfemployment]_b[region4]+region5*[selfemployment]_b[region5]+region6*[selfemployment]_b[region6]+oldchildren*[selfemployment]_b[oldchildren]))


*********marginal effect of CHILDREN on WORKING
margins, expression(normal([working]_b[_cons]+ [working]_b[children] + secondary*[working]_b[secondary]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren])  ///
		-normal([working]_b[_cons] + secondary*[working]_b[secondary]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren]))

******marginal  EDUCATION in WORKING
margins, expression(normal([working]_b[_cons]+  smkids*[working]_b[children]+[working]_b[secondary] + marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren])  ///
		-normal([working]_b[_cons] +smkids*[working]_b[children]+ marital_status*[working]_b[marital_status] + age_group2*[working]_b[age_group2] + age_group3*[working]_b[age_group3] ///
		+ age_group4*[working]_b[age_group4]+age_group5*[working]_b[age_group5]+age_group6*[working]_b[age_group6]+age_group7*[working]_b[age_group7]+area_residence2*[working]_b[area_residence2]+UnemploymentRate*[working]_b[UnemploymentRate] ///
		+region2*[working]_b[region2]+region3*[working]_b[region3]+region4*[working]_b[region4]+region5*[working]_b[region5]+region6*[working]_b[region6]+oldchildren*[working]_b[oldchildren]))


************ marginal effect of Infertility on CHILDREN
 margins, expression(normal(xb(children)+(1-Infertility)*[children]_b[Infertility])-normal(xb(children)-Infertility*[children]_b[Infertility]))

restore


 
****************************************************************************************************************
**      TABLE 5 :  Estimated error correlation coefficients and test for endogeneity and selection             *
****************************************************************************************************************
***** note: (nlcom run with Stata 14.1)
preserve
nlcom (r21: [c21]_b[_cons]) (r31: [c31]_b[_cons]) (r32: [c21]_b[_cons]*[c31]_b[_cons]+ sqrt(1- [c21]_b[_cons]^2)*[c32]_b[_cons]), post
test r21 r32
test r21 r31
restore



*************************************************
*     TABLE 6 :     FOUR EQUATION ESTIMATION    *
*************************************************


preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2, family(nbinomial)  vce(cluster village) 
boottest   reform_high_intensity  , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren res res2,sel(working= children education  marital_status  age area_residence2  oldchildren  UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
margins, dydx(*) predict(psel)
restore



*************************************************
*        ROBUSTNESS CHECKS: Table 7, 8, 9, 10   *                    *
*************************************************

***********************************************************************************
*       Table 7 : Controlling for childmarriage and earlymotherhood               *
***********************************************************************************



preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity, family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2 , family(nbinomial)  vce(cluster village) 
boottest   reform_high_intensity  , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood  UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
margins, dydx(*) predict(psel)
restore



****************************************************************
*  Table 8 : Alternative clustering of standard errors         *
****************************************************************


****clustering at states level
preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster sstate)
boottest Infertility, reps (9999) bootcluster(sstate)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2 , family(nbinomial)  vce(cluster sstate) 
boottest   reform_high_intensity  , reps (9999) bootcluster(sstate)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood  res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood   UnemploymentRate res res2) vce(cluster sstate) 
boottest children  education, reps (9999) bootcluster(sstate)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore


***** clustering at state x birth cohort level
egen agestate= group(age_group sstate)
preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster agestate)
boottest Infertility, reps (9999) bootcluster(agestate)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status area_residence2 , family(nbinomial)  vce(cluster agestate) 
boottest   reform_high_intensity  , reps (9999) bootcluster(agestate)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood  res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood   UnemploymentRate res res2) vce(cluster agestate) 
boottest children  education, reps (9999) bootcluster(agestate)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore



**** clustering at region level
preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster region)
boottest Infertility, reps (9999) bootcluster(region)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status area_residence2 , family(nbinomial)  vce(cluster region) 
boottest   reform_high_intensity  , reps (9999) bootcluster(region)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood  res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood   UnemploymentRate res res2) vce(cluster region) 
boottest children  education, reps (9999) bootcluster(region)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore


******clustering at region x birth cohort level
egen ageregion= group(age_group region)
preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster ageregion)
boottest Infertility, reps (9999) bootcluster(ageregion)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status area_residence2 , family(nbinomial)  vce(cluster ageregion) 
boottest   reform_high_intensity  , reps (9999) bootcluster(ageregion)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood  res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood   UnemploymentRate res res2) vce(cluster ageregion) 
boottest children  education, reps (9999) bootcluster(ageregion)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore


******clustering at Ethnicity x birth cohort level
egen ageetnicity= group(age_group v131)
preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster ageetnicity)
boottest Infertility, reps (9999) bootcluster(ageetnicity)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status area_residence2 , family(nbinomial)  vce(cluster ageetnicity) 
boottest   reform_high_intensity  , reps (9999) bootcluster(ageetnicity)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood  res res2,sel(working= children education  marital_status  age area_residence2  oldchildren childmarriage earlymotherhood   UnemploymentRate res res2) vce(cluster ageetnicity) 
boottest children  education, reps (9999) bootcluster(ageetnicity)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore



*********************************************************
**    Table 9: Robustness check - Using the 2013 NDHS   *
*********************************************************


set more off
clear all
clear matrix
clear mata

set mem 1000m
set matsize 5000
set maxvar 32000

append using "C:\Users\Bago\Desktop\BD_EDCC\Data\ndhs3.dta"
rename v208 kidunder5
rename v106 education_level
rename v133 education
replace v502 =0 if v502==2
rename v502 marital_status
rename v013 age_group
rename v012 age
rename v025 area_residence
rename v732 saisonal
rename v201 totalchildren
rename v136 hhsize
rename v101 region
rename v3a08e infertility1
rename v203 daughters
rename v202 sons
rename v505 otherwives
rename v001 village
replace otherwives=. if otherwives==98|otherwives==99

gen oldchildren = daughters+sons-kidunder5
gen secondary=.
replace secondary=1 if education_level>=2
replace secondary=0 if education_level==0|education_level==1

gen primary=.
replace primary=1 if education_level==1
replace primary=0 if education_level==0|education_level>=2

gen infertility2=.
replace infertility2=1 if v605==7
replace infertility2=0 if v605!=7  &  v605!=. 

gen Infertility=.
replace  Infertility=0 if infertility2==0 & infertility1==0
replace  Infertility=1 if infertility2==1 | infertility1==1


gen children=.
replace children=1 if kidunder5>0
replace children=0 if kidunder5==0


gen working=.
replace working=0 if v731==0
replace working=1 if v731==2|v731==3|v731==1
label var working " Working "
label define labetype1 0 "not working" 1 "working"
label value working labetype1

gen selfemployment = .
replace selfemployment= 0 if (v719==1|v719==2)
replace selfemployment= 1 if  v719==3
label var selfemployment " self employment" 
label define labtype  1 " self " 0 " wage " 
label value selfemployment labtype
replace working=. if (selfemployment==. & working==1)


gen EmploymentRate=.
local N = _N
forvalues num = 1/`N'{
quiet sum working if (age_group == age_group[`num'] & region == region[`num'])
quiet replace EmploymentRate = r(mean) in `num'
}
gen UnemploymentRate=(1-EmploymentRate)


gen id = _n
local id id

gen touse = !missing(Infertility, working, kidunder5, children,  secondary,  marital_status, age_group, UnemploymentRate, area_residence, region)
drop if touse!=1
tabulate age_group, generate (age_group)
tabulate region, generate (region)
tabulate area_residence, generate (area_residence)
gen smkids=children

gen reform =.
replace reform =1 if age <= 20
replace reform =0 if age >20

gen high_intensity=.
replace high_intensity = 1 if region ==1|region==2|region==4|region==5
replace high_intensity = 0 if region ==3|region==6
gen reform_high_intensity =  reform*high_intensity

rename v212 agefirstbirth
rename v511 Ageat1stUnion
gen childmarriage = .
replace childmarriage = 1 if Ageat1stUnion <= 18 
replace childmarriage = 0 if Ageat1stUnion > 18

gen earlymotherhood=.
replace earlymotherhood=1 if agefirstbirth<18
replace earlymotherhood=0 if agefirstbirth>=18 

preserve
glm children Infertility  marital_status age oldchildren area_residence2 high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity  area_residence2 , family(nbinomial)  vce(cluster village) 
boottest   reform_high_intensity  , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status age  area_residence2  oldchildren childmarriage earlymotherhood res res2,sel(working= children education  marital_status  age area_residence2  oldchildren  childmarriage earlymotherhood  UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
margins, dydx(*) predict(psel)
restore




************************************************
**      Table 10: Does ethnicity matter?       *
************************************************


set more off
clear all
clear matrix
clear mata

set mem 1000m
set matsize 5000
set maxvar 32000

append using "C:\Users\Bago\Desktop\BD_EDCC\Data\ndhs8.dta"
gen round=2018
rename v131 v131_2018
append using "C:\Users\Bago\Desktop\BD_EDCC\Data\ndhs3.dta"
replace round=2013 if round==.
rename v208 kidunder5
rename v106 education_level
rename v133 education
replace v502 =0 if v502==2
rename v502 marital_status
rename v013 age_group
rename v012 age
rename v025 area_residence
rename v732 saisonal
rename v201 totalchildren
rename v136 hhsize
rename v101 region
rename v3a08e infertility1
rename v203 daughters
rename v202 sons
rename v505 otherwives
rename v001 village
replace otherwives=. if otherwives==98|otherwives==99
rename v212 agefirstbirth
rename v511 Ageat1stUnion

gen oldchildren = daughters+sons-kidunder5
gen secondary=.
replace secondary=1 if education_level>=2
replace secondary=0 if education_level==0|education_level==1

gen primary=.
replace primary=1 if education_level==1
replace primary=0 if education_level==0|education_level>=2

gen infertility2=.
replace infertility2=1 if v605==7
replace infertility2=0 if v605!=7  &  v605!=. 

gen Infertility=.
replace  Infertility=0 if infertility2==0 & infertility1==0
replace  Infertility=1 if infertility2==1 | infertility1==1


gen children=.
replace children=1 if kidunder5>0
replace children=0 if kidunder5==0


gen working=.
replace working=0 if v731==0
replace working=1 if v731==2|v731==3|v731==1
label var working " Working "
label define labetype1 0 "not working" 1 "working"
label value working labetype1

gen selfemployment = .
replace selfemployment= 0 if (v719==1|v719==2)
replace selfemployment= 1 if  v719==3
label var selfemployment " self employment" 
label define labtype  1 " self " 0 " wage " 
label value selfemployment labtype
replace working=. if (selfemployment==. & working==1)


gen EmploymentRate=.
local N = _N
forvalues num = 1/`N'{
quiet sum working if (age_group == age_group[`num'] & region == region[`num'])
quiet replace EmploymentRate = r(mean) in `num'
}
gen UnemploymentRate=(1-EmploymentRate)

gen id = _n
local id id

gen touse = !missing(Infertility, working, kidunder5, children,  secondary,  marital_status, age_group, UnemploymentRate, area_residence, region)
drop if touse!=1
tabulate age_group, generate (age_group)
tabulate region, generate (region)
tabulate area_residence, generate (area_residence)
gen smkids=children


gen childmarriage = .
replace childmarriage = 1 if Ageat1stUnion <= 18 
replace childmarriage = 0 if Ageat1stUnion > 18

gen reform =.
replace reform =1 if age <=  23
replace reform =0 if age >23

gen high_intensity=.
replace high_intensity = 0 if region ==1|region==2|region==4|region==5
replace high_intensity = 1 if region ==3|region==6

gen reform_high_intensity =  reform*high_intensity

gen earlymotherhood=.
replace earlymotherhood=1 if agefirstbirth<18
replace earlymotherhood=0 if agefirstbirth>=18 

preserve
keep if (v131_2018==130 & round==2018) | (v131==6 & round==2013) //haussa
glm children Infertility  marital_status area_residence2  high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2, family(nbinomial)  vce(cluster village)
boottest  reform reform_high_intensity high_intensity , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment children  education  marital_status  oldchildren res res2,sel(working= children education marital_status   oldchildren UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore

preserve
keep if (v131_2018==298 & round==2018) |(v131==10 & round==2013) //Yoruba
glm children Infertility  marital_status area_residence2  high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2, family(nbinomial)  vce(cluster village)
boottest  reform reform_high_intensity high_intensity , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
outreg2 using tab1.tex, append ctitle(Model 4)
heckprobit selfemployment  children  education  marital_status  oldchildren  res res2,sel(working= children education marital_status  oldchildren    UnemploymentRate res res2) vce(cluster village) 
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore


preserve
keep if (v131_2018==138 & round==2018) |(v131==6 & round==2013) //Ibo
glm children Infertility  marital_status area_residence2  high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2, family(nbinomial)  vce(cluster village)
boottest  reform reform_high_intensity high_intensity , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment  children  education  marital_status  oldchildren  res res2,sel(working= children education marital_status  oldchildren    UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore


preserve
keep if (round==2018 & (v131_2018 !=130 & v131_2018 !=138 & v131_2018 !=298 & v131_2018 !=.)) | (round==2013 & (v131 !=6 & v131 !=10 & v131 !=3 & v131 !=.)) //Other etnic groups
glm children Infertility  marital_status area_residence2  high_intensity , family(binomial) link(probit) vce(cluster village)
boottest Infertility, reps (9999) bootcluster(village)
predict res, anscombe
margins, dydx(*)
glm education reform reform_high_intensity high_intensity marital_status  area_residence2, family(nbinomial)  vce(cluster village)
boottest  reform reform_high_intensity high_intensity , reps (9999) bootcluster(village)
predict res2,res
margins, dydx(*)
heckprobit selfemployment  children  education  marital_status  oldchildren  res res2,sel(working= children education marital_status  oldchildren    UnemploymentRate res res2) vce(cluster village) 
boottest children  education, reps (9999) bootcluster(village)
margins, dydx(*)
quiet margins, dydx(*) predict(psel)
restore



*===================================THE END==========================================


